library(ggplot2)
library(tidyverse)
library(texreg)
library(marginaleffects)
library(gridExtra)
library(estimatr)
library(haven)

# Set paths for figure and table output
figurespath <- "."
tablesspath <- "."

# Load dataset
data <- read_csv("institutionlevel.csv")
data$has_polisci_division <- ifelse(data$num_polisci_divisions>0, 1, 0)
data$has_poliscistrict_division <- ifelse(data$num_poliscistrict_divisions>0, 1, 0)
data$has_irstrict_division <- ifelse(data$num_irstrict_divisions>0, 1, 0)
data$has_civicsstrict_division <- ifelse(data$num_civicsstrict_divisions>0, 1, 0)
data$has_sociology_division <- ifelse(data$num_sociology_divisions>0, 1, 0)
data$num_divisions_log <- log10(data$num_divisions)
data$public <- as.factor(data$public)
data$age <- 2022-data$founding_year
data <- data %>% filter(!is.na(distrank))
data <- data %>% filter(!is.na(v2x_polyarchy))

# Generate three groups of regimes
ctr <- data %>% select(cowcode, v2x_polyarchy) %>% distinct(cowcode, v2x_polyarchy)
ctr$bin <- cut(ctr$v2x_polyarchy, quantile(ctr$v2x_polyarchy, prob = seq(0, 1, length.out = 4), na.rm=T), include.lowest = TRUE, labels=F)
mid <- ctr %>% group_by(bin) %>%  summarize(mid=mean(v2x_polyarchy)) 
ctr <- ctr %>% select(-v2x_polyarchy)
data <- data %>% inner_join(ctr)

# Export for separate analysis in Stata
data %>% write_dta("institutionlevel.dta")

# Plot: Map of institutions with PS divisions
# NOTE: Since the latitude/longitude data had to be stripped from the replication data, this step cannot be replicated with the dataset provided. 
library(maps)
psinstitutions <- data %>% filter(num_polisci_divisions>0)
psmap <- ggplot() + coord_fixed() + xlab("") + ylab("") +
  geom_polygon(data=map_data("world"), aes(x=long, y=lat, group=group), colour="light grey", fill="light grey") +
  geom_point(data=psinstitutions, 
             aes(x=longitude, y=latitude), colour="blue", 
             fill="blue",pch=21, size=0.2, alpha=I(0.7)) +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        panel.background = element_rect(fill = 'white', colour = 'white'), 
        axis.line = element_line(colour = "white"), legend.position="none",
        axis.ticks=element_blank(), axis.text.x=element_blank(),
        axis.text.y=element_blank())
psmap
ggsave(file.path(figurespath, "poliscimap.pdf"), height=6, width=10)


# Does Institution have a PS division? 
m1a <- lm_robust(has_polisci_division ~ as.factor(bin) + age + num_divisions_log, clusters = cowcode, data=data)
m1b <- lm_robust(has_polisci_division ~ as.factor(bin) + age + num_divisions_log + log10(gdp_pc) + log10(ctrpop), clusters = cowcode, data=data)
m1c <- lm_robust(has_polisci_division ~ as.factor(bin) + age + num_divisions_log + as.factor(nationalpsa) + log10(gdp_pc) + log10(ctrpop), clusters = cowcode, data=data)
m1d <- lm_robust(has_polisci_division ~ as.factor(bin) + age + num_divisions_log + as.factor(ipsa) + log10(gdp_pc) + log10(ctrpop), clusters = cowcode, data=data)

# Create table in Appendix Section A.1
texreg(list(m1a, m1b, m1c, m1d), file=file.path(tablesspath, "m1.tex"), include.ci = FALSE, custom.coef.map=list("as.factor(bin)2" = "Hybrid", "as.factor(bin)3" = "Democratic",  "age" = "Age", "num_divisions_log" = "N.~divisions (log)", "log10(gdp_pc)" = "GDP p.c. (log)", "log10(ctrpop)" = "Country pop. (log)", "as.factor(nationalpsa)1" = "National PS assoc.", "as.factor(ipsa)1" = "IPSA member", "(Intercept)" = "(Intercept)"),  custom.gof.rows=list("Clustered SEs (country)" = c("Y", "Y", "Y", "Y")), omit.coef=c('cowcode'), table=F, stars = c(0.001, 0.01, 0.05), digits=3, booktabs = T, use.packages = FALSE)

# Create Figure 3 in the main article
plot1b <- plot_predictions(m1b, condition = "bin") + theme_bw() + xlab("Regime type") + ylab("") + scale_x_discrete(labels=c("1" = "Autocratic", "2" = "Hybrid", "3" = "Democratic")) #+ ylim(0.1, 0.4)
pdf(file.path(figurespath, "model1.pdf"), width=4, height=4)
grid.arrange(plot1b, nrow=1)
dev.off()

# Models with alternative definitions of PS
m1b_poliscistrict <- lm_robust(has_poliscistrict_division ~ as.factor(bin) + age + num_divisions_log + log10(gdp_pc) + log10(ctrpop), clusters = cowcode, data=data)
m1b_irstrict <- lm_robust(has_irstrict_division ~ as.factor(bin) + age + num_divisions_log + log10(gdp_pc) + log10(ctrpop), clusters = cowcode, data=data)
m1b_civicsstrict <- lm_robust(has_civicsstrict_division ~ as.factor(bin) + age + num_divisions_log + log10(gdp_pc) + log10(ctrpop), clusters = cowcode, data=data)
m1b_sociology <- lm_robust(has_sociology_division ~ as.factor(bin) + age + num_divisions_log + log10(gdp_pc) + log10(ctrpop), clusters = cowcode, data=data)

# Create table in Appendix Section A.2
texreg(list(m1b_poliscistrict, m1b_irstrict, m1b_civicsstrict, m1b_sociology), file=file.path(tablesspath, "m1_strict.tex"), custom.model.names = c("PS only", "Govt., IR", "Civics, HR", "Sociology, Cult.~Studies"), include.ci = FALSE, custom.coef.map=list("as.factor(bin)2" = "Hybrid", "as.factor(bin)3" = "Democratic",  "age" = "Age", "num_divisions_log" = "N.~divisions (log)", "log10(gdp_pc)" = "GDP p.c. (log)", "log10(ctrpop)" = "Country pop. (log)", "(Intercept)" = "(Intercept)"),  custom.gof.rows=list("Clustered SEs (country)" = c("Y", "Y", "Y","Y")), omit.coef=c('cowcode'), table=F, stars = c(0.001, 0.01, 0.05), digits=3, booktabs = T, use.packages = FALSE)


# Models with country FEs
m2a <- lm_robust(has_polisci_division ~ distrank + public + num_divisions_log + as.factor(cowcode), clusters = cowcode, data=data[data$bin==1,])
m2b <- lm_robust(has_polisci_division ~ distrank + public + num_divisions_log + as.factor(cowcode), clusters = cowcode, data=data[data$bin==2,])
m2c <- lm_robust(has_polisci_division ~ distrank + public + num_divisions_log + as.factor(cowcode), clusters = cowcode, data=data[data$bin==3,])

# Create table in Appendix Section B.1
texreg(list(m2a, m2b, m2c), file=file.path(tablesspath, "m2.tex"), include.ci = FALSE, custom.coef.map=list("distrank" = "Rel.~dist.~to capital", "public1" = "Public inst.", "num_divisions_log" = "N.~divisions (log)", "(Intercept)" = "(Intercept)"), custom.model.names = c("Autocratic", "Hybrid", "Democratic"),  custom.gof.rows=list("Country FEs" = c("Y", "Y", "Y"), "Clustered SEs (country)" = c("Y", "Y", "Y")), omit.coef=c('cowcode'), table=F, stars = c(0.001, 0.01, 0.05), digits=3, booktabs = T, use.packages = FALSE)

# Create Figure 4 in the main article
distrank2a <- plot_predictions(m2a, condition = "distrank") + theme_bw() + labs(title="Autocracies", x="Relative distance to capital", y="Probability of having a PS division") + ylim(0.1, 0.5)
distrank2b <- plot_predictions(m2b, condition = "distrank") + theme_bw() + labs(title="Hybrid Regimes", x="Relative distance to capital", y="") + ylim(0.1, 0.5)
distrank2c <- plot_predictions(m2c, condition = "distrank") + theme_bw() + labs(title="Democracies", x="Relative distance to capital", y="") + ylim(0.1, 0.5)
pdf(file.path(figurespath, "model2_distrank.pdf"), width=12, height=4)
grid.arrange(distrank2a, distrank2b, distrank2c, nrow=1)
dev.off()

# Create Figure 5 in the main article
public2a <- plot_predictions(m2a, condition = "public") + theme_bw() + labs(title="Autocracies", x="", y="Probability of having a PS division") + scale_x_discrete(labels=c("0" = "Private", "1" = "Public")) + ylim(0.1, 0.5)
public2b <- plot_predictions(m2b, condition = "public") + theme_bw() + labs(title="Hybrid Regimes", x="", y="") + scale_x_discrete(labels=c("0" = "Private", "1" = "Public")) + ylim(0.1, 0.5)
public2c <- plot_predictions(m2c, condition = "public") + theme_bw() + labs(title="Democracies", x="", y="") + scale_x_discrete(labels=c("0" = "Private", "1" = "Public")) + ylim(0.1, 0.5)
pdf(file.path(figurespath, "model2_public.pdf"), width=12, height=4)
grid.arrange(public2a, public2b, public2c, nrow=1)
dev.off()

# Country FE models with different operationalizations of the DV: PoliSci strict
m2a_poliscistrict <- lm_robust(has_poliscistrict_division ~ distrank + public + num_divisions_log + as.factor(cowcode), clusters = cowcode, data=data[data$bin==1,])
m2b_poliscistrict <- lm_robust(has_poliscistrict_division ~ distrank + public + num_divisions_log + as.factor(cowcode), clusters = cowcode, data=data[data$bin==2,])
m2c_poliscistrict <- lm_robust(has_poliscistrict_division ~ distrank + public + num_divisions_log + as.factor(cowcode), clusters = cowcode, data=data[data$bin==3,])

# Create table in Appendix Section B.2.1
texreg(list(m2a_poliscistrict, m2b_poliscistrict, m2c_poliscistrict), file=file.path(tablesspath, "m2_poliscistrict.tex"), include.ci = FALSE, custom.coef.map=list("distrank" = "Rel.~dist.~to capital", "public1" = "Public inst.", "num_divisions_log" = "N.~divisions (log)", "(Intercept)" = "(Intercept)"), custom.model.names = c("Autocratic", "Hybrid", "Democratic"),  custom.gof.rows=list("Country FEs" = c("Y", "Y", "Y"), "Clustered SEs (country)" = c("Y", "Y", "Y")), omit.coef=c('cowcode'), table=F, stars = c(0.001, 0.01, 0.05), digits=3, booktabs = T, use.packages = FALSE)

# Country FE models with different operationalizations of the DV: IR/Govt strict
m2a_irstrict <- lm_robust(has_irstrict_division ~ distrank + public + num_divisions_log + as.factor(cowcode), clusters = cowcode, data=data[data$bin==1,])
m2b_irstrict <- lm_robust(has_irstrict_division ~ distrank + public + num_divisions_log + as.factor(cowcode), clusters = cowcode, data=data[data$bin==2,])
m2c_irstrict <- lm_robust(has_irstrict_division ~ distrank + public + num_divisions_log + as.factor(cowcode), clusters = cowcode, data=data[data$bin==3,])

# Create table in Appendix Section B.2.2
texreg(list(m2a_irstrict, m2b_irstrict, m2c_irstrict), file=file.path(tablesspath, "m2_irstrict.tex"), include.ci = FALSE, custom.coef.map=list("distrank" = "Rel.~dist.~to capital", "public1" = "Public inst.", "num_divisions_log" = "N.~divisions (log)", "(Intercept)" = "(Intercept)"), custom.model.names = c("Autocratic", "Hybrid", "Democratic"),  custom.gof.rows=list("Country FEs" = c("Y", "Y", "Y"), "Clustered SEs (country)" = c("Y", "Y", "Y")), omit.coef=c('cowcode'), table=F, stars = c(0.001, 0.01, 0.05), digits=3, booktabs = T, use.packages = FALSE)

# Country FE models with different operationalizations of the DV: Civics/HR strict
m2a_civicsstrict <- lm_robust(has_civicsstrict_division ~ distrank + public + num_divisions_log + as.factor(cowcode), clusters = cowcode, data=data[data$bin==1,])
m2b_civicsstrict <- lm_robust(has_civicsstrict_division ~ distrank + public + num_divisions_log + as.factor(cowcode), clusters = cowcode, data=data[data$bin==2,])
m2c_civicsstrict <- lm_robust(has_civicsstrict_division ~ distrank + public + num_divisions_log + as.factor(cowcode), clusters = cowcode, data=data[data$bin==3,])

# Create table in Appendix Section B.2.3
texreg(list(m2a_civicsstrict, m2b_civicsstrict, m2c_civicsstrict), file=file.path(tablesspath, "m2_civicsstrict.tex"), include.ci = FALSE, custom.coef.map=list("distrank" = "Rel.~dist.~to capital", "public1" = "Public inst.",  "num_divisions_log" = "N.~divisions (log)", "(Intercept)" = "(Intercept)"), custom.model.names = c("Autocratic", "Hybrid", "Democratic"),  custom.gof.rows=list("Country FEs" = c("Y", "Y", "Y"), "Clustered SEs (country)" = c("Y", "Y", "Y")), omit.coef=c('cowcode'), table=F, stars = c(0.001, 0.01, 0.05), digits=3, booktabs = T, use.packages = FALSE)



